{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The MIT License (MIT)\n",
    "\n",
    "# Copyright (c) 2020, NVIDIA CORPORATION.\n",
    "\n",
    "# Permission is hereby granted, free of charge, to any person obtaining a copy of\n",
    "# this software and associated documentation files (the \"Software\"), to deal in\n",
    "# the Software without restriction, including without limitation the rights to\n",
    "# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of\n",
    "# the Software, and to permit persons to whom the Software is furnished to do so,\n",
    "# subject to the following conditions:\n",
    "\n",
    "# The above copyright notice and this permission notice shall be included in all\n",
    "# copies or substantial portions of the Software.\n",
    "\n",
    "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n",
    "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS\n",
    "# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR\n",
    "# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER\n",
    "# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n",
    "# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tutorial: Feature Engineering for Recommender Systems\n",
    "\n",
    "# 4. Feature Engineering - Numerical\n",
    "\n",
    "## 4.2. Normalization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import IPython\n",
    "\n",
    "import pandas as pd\n",
    "import cudf\n",
    "import numpy as np\n",
    "import cupy\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "df_train = cudf.read_parquet('./data/train.parquet')\n",
    "df_valid = cudf.read_parquet('./data/valid.parquet')\n",
    "df_test = cudf.read_parquet('./data/test.parquet')\n",
    "\n",
    "df_train['brand'] = df_train['brand'].fillna('UNKNOWN')\n",
    "df_valid['brand'] = df_valid['brand'].fillna('UNKNOWN')\n",
    "df_test['brand'] = df_test['brand'].fillna('UNKNOWN')\n",
    "\n",
    "df_train['cat_0'] = df_train['cat_0'].fillna('UNKNOWN')\n",
    "df_valid['cat_0'] = df_valid['cat_0'].fillna('UNKNOWN')\n",
    "df_test['cat_0'] = df_test['cat_0'].fillna('UNKNOWN')\n",
    "\n",
    "df_train['cat_1'] = df_train['cat_1'].fillna('UNKNOWN')\n",
    "df_valid['cat_1'] = df_valid['cat_1'].fillna('UNKNOWN')\n",
    "df_test['cat_1'] = df_test['cat_1'].fillna('UNKNOWN')\n",
    "\n",
    "df_train['cat_2'] = df_train['cat_2'].fillna('UNKNOWN')\n",
    "df_valid['cat_2'] = df_valid['cat_2'].fillna('UNKNOWN')\n",
    "df_test['cat_2'] = df_test['cat_2'].fillna('UNKNOWN')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We generate some numerical features with the feature engineering from the previous notebooks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def target_encode(train, valid, col, target, kfold=5, smooth=20, gpu=True):\n",
    "    \"\"\"\n",
    "        train:  train dataset\n",
    "        valid:  validation dataset\n",
    "        col:   column which will be encoded (in the example RESOURCE)\n",
    "        target: target column which will be used to calculate the statistic\n",
    "    \"\"\"\n",
    "    \n",
    "    # We assume that the train dataset is shuffled\n",
    "    train['kfold'] = ((train.index) % kfold)\n",
    "    # We keep the original order as cudf merge will not preserve the original order\n",
    "    if gpu:\n",
    "        train['org_sorting'] = cupy.arange(len(train), dtype=\"int32\")\n",
    "    else:\n",
    "        train['org_sorting'] = np.arange(len(train), dtype=\"int32\")\n",
    "    # We create the output column, we fill with 0\n",
    "    col_name = '_'.join(col)\n",
    "    train['TE_' + col_name] = 0.\n",
    "    for i in range(kfold):\n",
    "        ###################################\n",
    "        # filter for out of fold\n",
    "        # calculate the mean/counts per group category\n",
    "        # calculate the global mean for the oof\n",
    "        # calculate the smoothed TE\n",
    "        # merge it to the original dataframe\n",
    "        ###################################\n",
    "        \n",
    "        df_tmp = train[train['kfold']!=i]\n",
    "        mn = df_tmp[target].mean()\n",
    "        df_tmp = df_tmp[col + [target]].groupby(col).agg(['mean', 'count']).reset_index()\n",
    "        df_tmp.columns = col + ['mean', 'count']\n",
    "        df_tmp['TE_tmp'] = ((df_tmp['mean']*df_tmp['count'])+(mn*smooth)) / (df_tmp['count']+smooth)\n",
    "        df_tmp_m = train[col + ['kfold', 'org_sorting', 'TE_' + col_name]].merge(df_tmp, how='left', left_on=col, right_on=col).sort_values('org_sorting')\n",
    "        df_tmp_m.loc[df_tmp_m['kfold']==i, 'TE_' + col_name] = df_tmp_m.loc[df_tmp_m['kfold']==i, 'TE_tmp']\n",
    "        train['TE_' + col_name] = df_tmp_m['TE_' + col_name].fillna(mn).values\n",
    "\n",
    "    \n",
    "    ###################################\n",
    "    # calculate the mean/counts per group for the full training dataset\n",
    "    # calculate the global mean\n",
    "    # calculate the smoothed TE\n",
    "    # merge it to the original dataframe\n",
    "    # drop all temp columns\n",
    "    ###################################    \n",
    "    \n",
    "    df_tmp = train[col + [target]].groupby(col).agg(['mean', 'count']).reset_index()\n",
    "    mn = train[target].mean()\n",
    "    df_tmp.columns = col + ['mean', 'count']\n",
    "    df_tmp['TE_tmp'] = ((df_tmp['mean']*df_tmp['count'])+(mn*smooth)) / (df_tmp['count']+smooth)\n",
    "    if gpu:\n",
    "        valid['org_sorting'] = cupy.arange(len(valid), dtype=\"int32\")\n",
    "    else:\n",
    "        valid['org_sorting'] = np.arange(len(valid), dtype=\"int32\")\n",
    "    df_tmp_m = valid[col + ['org_sorting']].merge(df_tmp, how='left', left_on=col, right_on=col).sort_values('org_sorting')\n",
    "    valid['TE_' + col_name] = df_tmp_m['TE_tmp'].fillna(mn).values\n",
    "    \n",
    "    valid = valid.drop('org_sorting', axis=1)\n",
    "    train = train.drop('kfold', axis=1)\n",
    "    train = train.drop('org_sorting', axis=1)\n",
    "    return(train, valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['event_time', 'event_type', 'product_id', 'brand', 'price', 'user_id',\n",
       "       'user_session', 'target', 'cat_0', 'cat_1', 'cat_2', 'cat_3',\n",
       "       'timestamp', 'ts_hour', 'ts_minute', 'ts_weekday', 'ts_day', 'ts_month',\n",
       "       'ts_year'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "cats = [['cat_0'], ['cat_1'], ['cat_2'], ['cat_0', 'cat_1', 'cat_2'], ['ts_hour'], ['ts_weekday'], ['ts_weekday', 'ts_hour', 'cat_2', 'brand']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "for cat in cats:\n",
    "    df_train, df_valid = target_encode(df_train, df_valid, cat, 'target')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "cats = ['brand', 'user_id', 'product_id', 'cat_0', 'cat_1', 'cat_2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def count_encode(train, valid, col, gpu=True):\n",
    "    \"\"\"\n",
    "        train:  train dataset\n",
    "        valid:  validation dataset\n",
    "        col:    column which will be count encoded (in the example RESOURCE)\n",
    "    \"\"\"\n",
    "    # We keep the original order as cudf merge will not preserve the original order\n",
    "    if gpu:\n",
    "        train['org_sorting'] = cupy.arange(len(train), dtype=\"int32\")\n",
    "    else:\n",
    "        train['org_sorting'] = np.arange(len(train), dtype=\"int32\")\n",
    "    \n",
    "    train_tmp = train[col].value_counts().reset_index()\n",
    "    train_tmp.columns = [col,  'CE_' + col]\n",
    "    df_tmp = train[[col, 'org_sorting']].merge(train_tmp, how='left', left_on=col, right_on=col).sort_values('org_sorting')\n",
    "    train['CE_' + col] = df_tmp['CE_' + col].fillna(0).values\n",
    "        \n",
    "    if gpu:\n",
    "        valid['org_sorting'] = cupy.arange(len(valid), dtype=\"int32\")\n",
    "    else:\n",
    "        valid['org_sorting'] = np.arange(len(valid), dtype=\"int32\")\n",
    "    df_tmp = valid[[col, 'org_sorting']].merge(train_tmp, how='left', left_on=col, right_on=col).sort_values('org_sorting')\n",
    "    valid['CE_' + col] = df_tmp['CE_' + col].fillna(0).values\n",
    "    \n",
    "    valid = valid.drop('org_sorting', axis=1)\n",
    "    train = train.drop('org_sorting', axis=1)\n",
    "    return(train, valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 648 ms, sys: 1.26 s, total: 1.91 s\n",
      "Wall time: 1.91 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "for cat in cats:\n",
    "    df_train, df_valid = count_encode(df_train, df_valid, cat, gpu=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_time</th>\n",
       "      <th>event_type</th>\n",
       "      <th>product_id</th>\n",
       "      <th>brand</th>\n",
       "      <th>price</th>\n",
       "      <th>user_id</th>\n",
       "      <th>user_session</th>\n",
       "      <th>target</th>\n",
       "      <th>cat_0</th>\n",
       "      <th>cat_1</th>\n",
       "      <th>...</th>\n",
       "      <th>TE_cat_0_cat_1_cat_2</th>\n",
       "      <th>TE_ts_hour</th>\n",
       "      <th>TE_ts_weekday</th>\n",
       "      <th>TE_ts_weekday_ts_hour_cat_2_brand</th>\n",
       "      <th>CE_brand</th>\n",
       "      <th>CE_user_id</th>\n",
       "      <th>CE_product_id</th>\n",
       "      <th>CE_cat_0</th>\n",
       "      <th>CE_cat_1</th>\n",
       "      <th>CE_cat_2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2019-12-01 00:00:28 UTC</td>\n",
       "      <td>cart</td>\n",
       "      <td>17800342</td>\n",
       "      <td>zeta</td>\n",
       "      <td>66.90</td>\n",
       "      <td>550465671</td>\n",
       "      <td>22650a62-2d9c-4151-9f41-2674ec6d32d5</td>\n",
       "      <td>0</td>\n",
       "      <td>computers</td>\n",
       "      <td>desktop</td>\n",
       "      <td>...</td>\n",
       "      <td>0.280155</td>\n",
       "      <td>0.305423</td>\n",
       "      <td>0.410060</td>\n",
       "      <td>0.301241</td>\n",
       "      <td>10859</td>\n",
       "      <td>9</td>\n",
       "      <td>743</td>\n",
       "      <td>372964</td>\n",
       "      <td>51652</td>\n",
       "      <td>5058060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2019-12-01 00:00:39 UTC</td>\n",
       "      <td>cart</td>\n",
       "      <td>3701309</td>\n",
       "      <td>polaris</td>\n",
       "      <td>89.32</td>\n",
       "      <td>543733099</td>\n",
       "      <td>a65116f4-ac53-4a41-ad68-6606788e674c</td>\n",
       "      <td>0</td>\n",
       "      <td>appliances</td>\n",
       "      <td>environment</td>\n",
       "      <td>...</td>\n",
       "      <td>0.350069</td>\n",
       "      <td>0.305249</td>\n",
       "      <td>0.410061</td>\n",
       "      <td>0.333539</td>\n",
       "      <td>50273</td>\n",
       "      <td>56</td>\n",
       "      <td>12</td>\n",
       "      <td>1527338</td>\n",
       "      <td>287043</td>\n",
       "      <td>213674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2019-12-01 00:00:40 UTC</td>\n",
       "      <td>cart</td>\n",
       "      <td>3701309</td>\n",
       "      <td>polaris</td>\n",
       "      <td>89.32</td>\n",
       "      <td>543733099</td>\n",
       "      <td>a65116f4-ac53-4a41-ad68-6606788e674c</td>\n",
       "      <td>0</td>\n",
       "      <td>appliances</td>\n",
       "      <td>environment</td>\n",
       "      <td>...</td>\n",
       "      <td>0.351989</td>\n",
       "      <td>0.305235</td>\n",
       "      <td>0.410059</td>\n",
       "      <td>0.319065</td>\n",
       "      <td>50273</td>\n",
       "      <td>56</td>\n",
       "      <td>12</td>\n",
       "      <td>1527338</td>\n",
       "      <td>287043</td>\n",
       "      <td>213674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2019-12-01 00:00:41 UTC</td>\n",
       "      <td>cart</td>\n",
       "      <td>3701309</td>\n",
       "      <td>polaris</td>\n",
       "      <td>89.32</td>\n",
       "      <td>543733099</td>\n",
       "      <td>a65116f4-ac53-4a41-ad68-6606788e674c</td>\n",
       "      <td>0</td>\n",
       "      <td>appliances</td>\n",
       "      <td>environment</td>\n",
       "      <td>...</td>\n",
       "      <td>0.351410</td>\n",
       "      <td>0.305370</td>\n",
       "      <td>0.410061</td>\n",
       "      <td>0.333539</td>\n",
       "      <td>50273</td>\n",
       "      <td>56</td>\n",
       "      <td>12</td>\n",
       "      <td>1527338</td>\n",
       "      <td>287043</td>\n",
       "      <td>213674</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2019-12-01 00:01:56 UTC</td>\n",
       "      <td>cart</td>\n",
       "      <td>1004767</td>\n",
       "      <td>samsung</td>\n",
       "      <td>235.60</td>\n",
       "      <td>579970209</td>\n",
       "      <td>c6946211-ce70-4228-95ce-fd7fccdde63c</td>\n",
       "      <td>0</td>\n",
       "      <td>construction</td>\n",
       "      <td>tools</td>\n",
       "      <td>...</td>\n",
       "      <td>0.460389</td>\n",
       "      <td>0.305449</td>\n",
       "      <td>0.410061</td>\n",
       "      <td>0.466269</td>\n",
       "      <td>2323417</td>\n",
       "      <td>9</td>\n",
       "      <td>317711</td>\n",
       "      <td>3363367</td>\n",
       "      <td>3307872</td>\n",
       "      <td>3172781</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                event_time event_type  product_id    brand   price    user_id  \\\n",
       "0  2019-12-01 00:00:28 UTC       cart    17800342     zeta   66.90  550465671   \n",
       "1  2019-12-01 00:00:39 UTC       cart     3701309  polaris   89.32  543733099   \n",
       "2  2019-12-01 00:00:40 UTC       cart     3701309  polaris   89.32  543733099   \n",
       "3  2019-12-01 00:00:41 UTC       cart     3701309  polaris   89.32  543733099   \n",
       "4  2019-12-01 00:01:56 UTC       cart     1004767  samsung  235.60  579970209   \n",
       "\n",
       "                           user_session  target         cat_0        cat_1  \\\n",
       "0  22650a62-2d9c-4151-9f41-2674ec6d32d5       0     computers      desktop   \n",
       "1  a65116f4-ac53-4a41-ad68-6606788e674c       0    appliances  environment   \n",
       "2  a65116f4-ac53-4a41-ad68-6606788e674c       0    appliances  environment   \n",
       "3  a65116f4-ac53-4a41-ad68-6606788e674c       0    appliances  environment   \n",
       "4  c6946211-ce70-4228-95ce-fd7fccdde63c       0  construction        tools   \n",
       "\n",
       "   ... TE_cat_0_cat_1_cat_2  TE_ts_hour  TE_ts_weekday  \\\n",
       "0  ...             0.280155    0.305423       0.410060   \n",
       "1  ...             0.350069    0.305249       0.410061   \n",
       "2  ...             0.351989    0.305235       0.410059   \n",
       "3  ...             0.351410    0.305370       0.410061   \n",
       "4  ...             0.460389    0.305449       0.410061   \n",
       "\n",
       "   TE_ts_weekday_ts_hour_cat_2_brand  CE_brand  CE_user_id  CE_product_id  \\\n",
       "0                           0.301241     10859           9            743   \n",
       "1                           0.333539     50273          56             12   \n",
       "2                           0.319065     50273          56             12   \n",
       "3                           0.333539     50273          56             12   \n",
       "4                           0.466269   2323417           9         317711   \n",
       "\n",
       "   CE_cat_0  CE_cat_1  CE_cat_2  \n",
       "0    372964     51652   5058060  \n",
       "1   1527338    287043    213674  \n",
       "2   1527338    287043    213674  \n",
       "3   1527338    287043    213674  \n",
       "4   3363367   3307872   3172781  \n",
       "\n",
       "[5 rows x 32 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['event_time', 'event_type', 'product_id', 'brand', 'price', 'user_id',\n",
       "       'user_session', 'target', 'cat_0', 'cat_1', 'cat_2', 'cat_3',\n",
       "       'timestamp', 'ts_hour', 'ts_minute', 'ts_weekday', 'ts_day', 'ts_month',\n",
       "       'ts_year', 'TE_cat_0', 'TE_cat_1', 'TE_cat_2', 'TE_cat_0_cat_1_cat_2',\n",
       "       'TE_ts_hour', 'TE_ts_weekday', 'TE_ts_weekday_ts_hour_cat_2_brand',\n",
       "       'CE_brand', 'CE_user_id', 'CE_product_id', 'CE_cat_0', 'CE_cat_1',\n",
       "       'CE_cat_2'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Theory"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>*Normalization*</b> is required to enable neural networks to leverage numerical features. Tree-based models do not require normalization as they define the split independent of the scale of a feature. Without normalization, neural networks are difficult to train. The image visualizes the loss surface and the gradient updates for non-normalized input (left) and normalized input (right). <br><br>\n",
    "\n",
    "Source: https://www.jeremyjordan.me/batch-normalization/\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img src='./images/normalization.png' width=50%>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The reason is that different numerical features have different scales. When we combine the features in a hidden layer, the different scales make it more difficult to extract patterns from it."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Normalization Techniques"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After we outline the importance for normalizing the numerical input feature, we will discuss different strategy to achieve a normal distributed input feature:\n",
    "1. Normalization with mean/std\n",
    "2. Log-based normalization\n",
    "3. Scale to 0-1\n",
    "4. Gauss Rank (separate notebook)\n",
    "5. Power transfomer"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.2.1 Normalization with mean/std"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The most common approach is to normalize a numerical feature by substracting the mean and divide the feature by the standard derviation:\n",
    "\n",
    "\\begin{equation} \\label{eq:normalize}\n",
    "X_{norm} = \\frac{X - mean_{X}}{\\sigma_{X}} \\sim \\mathcal{N}(0,\\,1)\\,\n",
    "\\end{equation}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df_train['CE_product_id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_norm = (X-X.mean())/X.std()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Our features does not follow a normal distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 1.0, 'Histogram normalised')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7EAAADSCAYAAACcq0cwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAqd0lEQVR4nO3df7hdVX3v+/dHEKWK/JCUgwkarDl60aci5EKsHq8FgUCr4Z6DXqynREvNbQWPPff2KrQ9xaq02OecWqhKL0JqsLaAnFpSC8aI8nhsCxIEkR9SNhEuSfkRCT9Ej1jwe/+YY+PKzv6xQvbea6+d9+t51rPnHHPMOceYa+051nfNMcdMVSFJkiRJ0jB41qALIEmSJElSvwxiJUmSJElDwyBWkiRJkjQ0DGIlSZIkSUPDIFaSJEmSNDQMYiVJkiRJQ8MgVvNCkluTvHHQ5dDMS/LpJB9p0/8uyR0zsI+7k7xpurcrSZqabfpgJflgkr9s0y9O8niS3aZ5H9ck+fXp3KZ2LQaxmvPGCyiSvDPJ10fnq+qVVXXNFNtZnKSS7D5DRdUsq6r/UVUvH3Q5JEn9sU0fLlX1/1XV86vqqUGXReplECtNExvS8U33r7eSJM20YWrTh6ms0nQxiNW80PvLbpIjkmxI8liSB5L8Scv2tfb3kdY15rVJnpXk95Lck+TBJBcn2btnu6e0ZQ8l+S9j9vPBJJcn+cskjwHvbPv+pySPJLkvyceT7NGzvUryniR3Jvl+kg8n+bkk/9jKe1lv/jF1fGeSryf5r0keTvLdJMf3LH9RkrVJtiYZSfLunmUfbNu+uO331iRLJzmek+ZP8r+0rkCPtGVv6Vn26STnJ7kyyQ+AX2zH7f9JcnOSHyS5KMkBSa5q2/9ykn17tvG5JPcneTTJ15K8coJyvjHJpp75DyTZ3LZ5R5KjW/qzkpyR5K72Xl6WZL+e9X61533+3YmOiyRp5tmm99Wmjy3rNUk+0vb9eJK/S/LCJJ9tZbk+yeKebZyb5N627IYk/26Ccm5zxbuVe2Or73eTvKMn768lub3VZ12Sl/QsOybJd1q7/nEgU30OpMkYxGo+Ohc4t6peAPwccFlLf0P7u0/rGvNPwDvb6xeBlwLPBz4OkOQQ4JPAO4ADgb2BhWP2tQK4HNgH+CzwFPCfgf2B1wJHA+8Zs85xwOHAMuD9wAXAfwQOAl4FvH2Suh0J3NG2/8fARUlGG4JLgE3Ai4CTgD9MclTPum9pefYB1o7WcxLj5k/ybODvgC8BPwu8F/hskt5uvb8CnA3sBYx2EfsPwDHAvwXeDFwF/A6wgO5c9J961r8KWNK2/026Yzuptv/Tgf+1qvaiO853t8XvBU4E/je64/Mw8Im23iHA+cCvtmUvBBZNtT9J0qywTR+/TR9bVoCT6dqyhXTH6p+AvwD2A24HzupZ/3rg0Lbsr4DPJXnuJGUlyfOA84DjWzv7C8BNbdkKujb939O16/8D+Ou2bH/gb4Dfa3W9C3jdZPuSpmIQq2Hxt+2X0EeSPELXEE3kX4GXJdm/qh6vqmsnyfsO4E+qamNVPQ6cCZzcfnE8Cfi7qvp6Vf0Y+H2gxqz/T1X1t1X1k6r6n1V1Q1VdW1VPVtXdwP9LFzj1+uOqeqyqbgVuAb7U9v8oXfD2mknKe09Vfardm7KGriE+IMlBdA3CB6rqR1V1E3AhcErPul+vqivbup8BXj3JfibLv4zui8E5VfXjqvoK8AW2baivqKp/aMflRy3tz6rqgaraTNe4XVdVN7bln++td1WtrqrvV9UTwAeBV/f+mj6Bp4DnAIckeXZV3V1Vd7VlvwH8blVt6tnmST3v8xeq6mtt2X8BfjLFviRJz5xtemdn2vRtytrS/qKq7urZ911V9eWqehL4HNu2s39ZVQ+1uv03uvaznzEmfgK8KsmeVXVfqzd07ewfVdXtbX9/CBzarsaeANxaVZdX1b8Cfwrc38e+pAkZxGpYnFhV+4y+2P6X0F6n0l3t+07rPvPLk+R9EXBPz/w9wO7AAW3ZvaMLquqHwENj1r+3dybJv03yhXRdYR+jO4nvP2adB3qm/+c488+fpLxPn/RbeWj5XwRsrarvj6nLwvHWBX4IPDfJ7kne0boePZ7kqqnyt33dW1W9gd7YfW1zXJq+6p1ktyTnpOv6+xg/vZo69jhuo6pGgN+iC1AfTHJJkhe1xS8BPt/zhel2uqB3vPf5B2z/PkuSpo9temdn2vRn3M4CJPnt1vX30dYu7s3U7ewPgP+DLmC9L8nfJ3lFW/wS4NyednYrXZfhhWx/7GuC8kt9M4jVvFNVd1bV2+m6on4UuLx1gRn7iyvAv9CdeEe9GHiS7sR/Hz3dSpPsSdfVdJvdjZk/H/gOsKR1ffodZue+j38B9kuyV0/ai4HNU61YVZ9tXbGeX1XHT5W/7eugJL3nj7H7Gu9Y9+tX6LpJvYmuUV3c0qc8jlX1V1X1err3tOjef+gay+N7vzRV1XPbVeH76Lp9dTtJfobt32dJ0gDYpj9t2trZdv/r+4G3Afu2HxIepb92dl1VHUN31fg7wKfaonuB/3NMO7tnVf0j27ez6Z2XngmDWM07Sf5jkgXtSuEjLfknwJb296U92f8a+M9JDk7yfLpfWS9tXWEuB96c5BfSDczwQaY+we8FPAY83n6d/M1pqtakqupe4B+BP0ry3CQ/T/fr9V/OwO6uo7sy+/4kz073LL83092/Mx32Ap6g+4X8Z+jekykleXmSo5I8B/gR3a/Oo1eL/xw4e3SQiSQL2v070L3Pv5zk9e19/hCeGyVpTrBNn5E2fS+64H4LsHuS3wdeMNVK6QZkXNF+RHgCeJxt29kz0wZiTLJ3kre2ZX8PvDLJv289uv4T8G+mqS7aRflFTfPRcuDWJI/TDQhxcru35Yd0gw39Q+vusgxYTXe/59eA79IFP+8FaPd5vJcuOLuP7mT9IN2JeyK/TXcl8ft0v05eOv3Vm9Db6a5a/gvdPaZnVdWXp3sn7V6iNwPHA9+ju5fplKr6zjTt4mK6blObgduAye5/6vUc4JxWpvvpfrU/sy07l25wqi8l+X7b5pHw9Pt8Gt3AFvfRDfq0CUnSXGCbPv1t+jrgi8A/07W3P6K/7r3PAv6vVqatdPcH/yZAVX2e7kr5Ja3r9S103xOoqu8Bb6Vrox+iG7jxH6apLtpFpeuWLmkq7VfdR+i6FX13wMWRJEnPkG26NNy8EitNIsmbk/xM6zrzX4Fv89OBhiRJ0pCwTZfmD4NYaXIr6LrN/Atd95eTy+4LkiQNI9t0aZ6wO7EkSZIkaWhMeSW2jfh5U8/rsSS/lWS/JOuT3Nn+7tvyJ8l5SUaS3JzksJ5trWz570yysif98CTfbuuc14beliRJkiRpG1MGsVV1R1UdWlWHAofTPVrj88AZwNVVtQS4us1DNxLZkvZaRfeMLZLsB5xFNyLoEcBZo4Fvy/PunvWWT0flJEmSJEnzy+47mP9o4K6quqc9Y/GNLX0NcA3wAbr7DS5u9xhcm2SfJAe2vOuraitAkvXA8iTXAC+oqmtb+sXAicBVkxVk//33r8WLF+9g8SVJ2t4NN9zwvapaMOhy7Igk+wAXAq8CCvg14A66x4Asphuw5m1V9XDr4XQucALdj9HvrKpvtu2sBH6vbfYjVbWmpR8OfBrYE7gSeN9U9w/aNkuSpstkbfOOBrEn0z1IGuCAqrqvTd8PHNCmF7Lts6Y2tbTJ0jeNkz6pxYsXs2HDhh0sviRJ20tyz6DL8AycC3yxqk5KsgfwM8Dv0PWSOifJGXS9pD7Atr2kjqTrAXVkTy+ppXSB8A1J1lbVw/y0l9R1dEHscqb4gdm2WZI0XSZrm/senbg1kG8BPjd2WftldsZHiEqyKsmGJBu2bNky07uTJGlOSrI38AbgIoCq+nFVPULXG2pNy7aGrmcT9PSSaj2fRntJHUfrJdUC19FeUgfSekm1Nv7inm1JkjRQO/KIneOBb1bVA23+gdbI0f4+2NI3Awf1rLeopU2Wvmic9O1U1QVVtbSqli5YMFS9viRJmk4HA1uAv0hyY5IL27MvB9pLSpKk2bAjQezb+WlXYoC1wOgIwyuBK3rST2mjFC8DHm0N6jrg2CT7tgGdjgXWtWWPJVnW7tk5pWdbkiRpe7sDhwHnV9VrgB/w0wEWAXtJSZLmr76C2Pbr7jHA3/QknwMck+RO4E1tHrr7ZjYCI8CngPcAtAGdPgxc314fGh3kqeW5sK1zF1PccyNJ0i5uE7Cpqq5r85fTBbX2kpIkzXt9DexUVT8AXjgm7SG60YrH5i3gtAm2sxpYPU76BrrRFSVJ0hSq6v4k9yZ5eVXdQdce39ZeK+l+WB7bS+r0JJfQDez0aFXdl2Qd8Ic9j7w7Fjizqra258IvoxvY6RTgz2atgpIkTWJHRyeedxaf8fcTLrv7nF+axZJIkrRD3gt8tg28uBF4F10Pq8uSnArcA7yt5b2S7vE6I3SP2HkXdL2kkoz2koLte0l9mu4RO1cxi72kbJslSZPZ5YNYSZKGUVXdRPdonLHsJSVJmtd2ZGAnSZIkSZIGyiBWkiRJkjQ0DGIlSZIkSUPDIFaSJEmSNDQMYiVJkiRJQ8MgVpIkSZI0NAxiJUmSJElDwyBWkiRJkjQ0DGIlSZIkSUPDIFaSJEmSNDQMYiVJkiRJQ8MgVpIkSZI0NPoKYpPsk+TyJN9JcnuS1ybZL8n6JHe2v/u2vElyXpKRJDcnOaxnOytb/juTrOxJPzzJt9s65yXJ9FdVkiRJkjTs+r0Sey7wxap6BfBq4HbgDODqqloCXN3mAY4HlrTXKuB8gCT7AWcBRwJHAGeNBr4tz7t71lu+c9WSJEmSJM1HUwaxSfYG3gBcBFBVP66qR4AVwJqWbQ1wYpteAVxcnWuBfZIcCBwHrK+qrVX1MLAeWN6WvaCqrq2qAi7u2ZYkSZIkSU/r50rswcAW4C+S3JjkwiTPAw6oqvtanvuBA9r0QuDenvU3tbTJ0jeNky5JkiRJ0jb6CWJ3Bw4Dzq+q1wA/4KddhwFoV1Br+ou3rSSrkmxIsmHLli0zvTtJkiRJ0hzTTxC7CdhUVde1+cvpgtoHWldg2t8H2/LNwEE96y9qaZOlLxonfTtVdUFVLa2qpQsWLOij6JIkSZKk+WTKILaq7gfuTfLylnQ0cBuwFhgdYXglcEWbXguc0kYpXgY82rodrwOOTbJvG9DpWGBdW/ZYkmVtVOJTerYlSZLGkeTuNrL/TUk2tDSfHCBJmvd27zPfe4HPJtkD2Ai8iy4AvizJqcA9wNta3iuBE4AR4IctL1W1NcmHgetbvg9V1dY2/R7g08CewFXtJUmSJveLVfW9nvnRJweck+SMNv8Btn1ywJF0TwU4sufJAUvpbgu6IcnaNgDj6JMDrqNr25dj+yxJmgP6CmKr6ia6Bm6so8fJW8BpE2xnNbB6nPQNwKv6KYskSZrQCuCNbXoNcA1dEPv0kwOAa9vz3w9sedeP/qicZPTJAdfQnhzQ0kefHGAQK0kauH6fEytJkuaWAr6U5IYkq1rarD85wEEXJUmzrd/uxJIkaW55fVVtTvKzwPok3+ldWFWVZMafHFBVFwAXACxdunTG9ydJkldiJUkaQlW1uf19EPg8cAQDeHKAJEmzzSBWkqQhk+R5SfYanaYb8f8WfHKAJGkXYHdiSZKGzwHA59tTb3YH/qqqvpjkenxygCRpnjOIlSRpyFTVRuDV46Q/hE8OkCTNc3YnliRJkiQNDYNYSZIkSdLQMIiVJEmSJA0Ng1hJkiRJ0tAwiJUkSZIkDQ2DWEmSJEnS0DCIlSRJkiQNDYNYSZIkSdLQ6CuITXJ3km8nuSnJhpa2X5L1Se5sf/dt6UlyXpKRJDcnOaxnOytb/juTrOxJP7xtf6Stm+muqCRJkiRp+O3IldhfrKpDq2ppmz8DuLqqlgBXt3mA44El7bUKOB+6oBc4CzgSOAI4azTwbXne3bPe8mdcI0mSJEnSvLUz3YlXAGva9BrgxJ70i6tzLbBPkgOB44D1VbW1qh4G1gPL27IXVNW1VVXAxT3bkiRJkiTpaf0GsQV8KckNSVa1tAOq6r42fT9wQJteCNzbs+6mljZZ+qZx0iVJkiRJ2sbufeZ7fVVtTvKzwPok3+ldWFWVpKa/eNtqAfQqgBe/+MUzvTtJkiRJ0hzT15XYqtrc/j4IfJ7untYHWldg2t8HW/bNwEE9qy9qaZOlLxonfbxyXFBVS6tq6YIFC/opuiRJkiRpHpkyiE3yvCR7jU4DxwK3AGuB0RGGVwJXtOm1wCltlOJlwKOt2/E64Ngk+7YBnY4F1rVljyVZ1kYlPqVnW5IkSZIkPa2f7sQHAJ9vT73ZHfirqvpikuuBy5KcCtwDvK3lvxI4ARgBfgi8C6Cqtib5MHB9y/ehqtrapt8DfBrYE7iqvSRJkiRJ2saUQWxVbQRePU76Q8DR46QXcNoE21oNrB4nfQPwqj7KK0mSJEnahe3MI3YkSdIAJdktyY1JvtDmD05yXZKRJJcm2aOlP6fNj7Tli3u2cWZLvyPJcT3py1vaSJIzttu5JEkDYhArSdLweh9we8/8R4GPVdXLgIeBU1v6qcDDLf1jLR9JDgFOBl4JLAc+2QLj3YBPAMcDhwBvb3klSRo4g1hJkoZQkkXALwEXtvkARwGXtyxrgBPb9Io2T1t+dMu/Arikqp6oqu/SjWdxRHuNVNXGqvoxcEnLK0nSwBnESpI0nP4UeD/wkzb/QuCRqnqyzW8CFrbphcC9AG35oy3/0+lj1pkoXZKkgTOIlSRpyCT5ZeDBqrphDpRlVZINSTZs2bJl0MWRJO0CDGIlSRo+rwPekuRuuq6+RwHnAvskGX3ywCJgc5veDBwE0JbvDTzUmz5mnYnSt1NVF1TV0qpaumDBgp2vmSRJUzCIlSRpyFTVmVW1qKoW0w3M9JWqegfwVeCklm0lcEWbXtvmacu/0h6JtxY4uY1efDCwBPgG3TPdl7TRjvdo+1g7C1WTJGlKUz4nVpIkDY0PAJck+QhwI3BRS78I+EySEWArXVBKVd2a5DLgNuBJ4LSqegogyenAOmA3YHVV3TqrNZEkaQIGsZIkDbGquga4pk1vpBtZeGyeHwFvnWD9s4Gzx0m/ErhyGosqSdK0sDuxJEmSJGloGMRKkiRJkoaGQawkSZIkaWgYxEqSJEmShoZBrCRJkiRpaPQdxCbZLcmNSb7Q5g9Ocl2SkSSXtufI0Z41d2lLvy7J4p5tnNnS70hyXE/68pY2kuSMaayfJEmSJGke2ZErse8Dbu+Z/yjwsap6GfAwcGpLPxV4uKV/rOUjySF0z6V7JbAc+GQLjHcDPgEcDxwCvL3llSRJkiRpG30FsUkWAb8EXNjmAxwFXN6yrAFObNMr2jxt+dEt/wrgkqp6oqq+C4zQPcvuCGCkqjZW1Y+BS1peSZIkSZK20e+V2D8F3g/8pM2/EHikqp5s85uAhW16IXAvQFv+aMv/dPqYdSZKlyRJkiRpG1MGsUl+GXiwqm6YhfJMVZZVSTYk2bBly5ZBF0eSJEmSNMv6uRL7OuAtSe6m6+p7FHAusE+S3VueRcDmNr0ZOAigLd8beKg3fcw6E6Vvp6ouqKqlVbV0wYIFfRRdkiRJkjSfTBnEVtWZVbWoqhbTDcz0lap6B/BV4KSWbSVwRZte2+Zpy79SVdXST26jFx8MLAG+AVwPLGmjHe/R9rF2WmonSZIkSZpXdp86y4Q+AFyS5CPAjcBFLf0i4DNJRoCtdEEpVXVrksuA24AngdOq6imAJKcD64DdgNVVdetOlEuSJEmSNE/tUBBbVdcA17TpjXQjC4/N8yPgrROsfzZw9jjpVwJX7khZJEmSJEm7nh15TqwkSZIkSQNlECtJkiRJGhoGsZIkSZKkoWEQK0mSJEkaGgaxkiQNmSTPTfKNJN9KcmuSP2jpBye5LslIkkvbo+toj7e7tKVfl2Rxz7bObOl3JDmuJ315SxtJcsasV1KSpAkYxEqSNHyeAI6qqlcDhwLLkywDPgp8rKpeBjwMnNrynwo83NI/1vKR5BC6R+G9ElgOfDLJbkl2Az4BHA8cAry95ZUkaeAMYiVJGjLVebzNPru9CjgKuLylrwFObNMr2jxt+dFJ0tIvqaonquq7wAjd4/OOAEaqamNV/Ri4pOWVJGngDGIlSRpC7YrpTcCDwHrgLuCRqnqyZdkELGzTC4F7AdryR4EX9qaPWWei9PHKsSrJhiQbtmzZMg01kyRpcgaxkiQNoap6qqoOBRbRXTl9xYDKcUFVLa2qpQsWLBhEESRJuxiDWEmShlhVPQJ8FXgtsE+S3duiRcDmNr0ZOAigLd8beKg3fcw6E6VLkjRwBrGSJA2ZJAuS7NOm9wSOAW6nC2ZPatlWAle06bVtnrb8K1VVLf3kNnrxwcAS4BvA9cCSNtrxHnSDP62d8YpJktSH3afOIkmS5pgDgTVtFOFnAZdV1ReS3AZckuQjwI3ARS3/RcBnkowAW+mCUqrq1iSXAbcBTwKnVdVTAElOB9YBuwGrq+rW2aueJEkTM4iVJGnIVNXNwGvGSd9Id3/s2PQfAW+dYFtnA2ePk34lcOVOF1aSpGlmd2JJkiRJ0tCYMohN8twk30jyrSS3JvmDln5wkuuSjCS5tN0zQ7uv5tKWfl2SxT3bOrOl35HkuJ705S1tJMkZM1BPSZIkSdI80M+V2CeAo6rq1cChwPIky4CPAh+rqpcBDwOntvynAg+39I+1fCQ5hO4enFcCy4FPtmfc7QZ8AjgeOAR4e8srSZIkSdI2pgxiq/N4m312exVwFHB5S18DnNimV7R52vKjk6SlX1JVT1TVd4ERuvt2jgBGqmpjVf0YuKTllSRJkiRpG33dE9uumN4EPAisB+4CHqmqJ1uWTcDCNr0QuBegLX8UeGFv+ph1JkqXJEmSJGkbfQWxVfVUVR1K97DzI4BXzGShJpJkVZINSTZs2bJlEEWQJEmSJA3QDo1OXFWP0D1I/bXAPklGH9GzCNjcpjcDBwG05XsDD/Wmj1lnovTx9n9BVS2tqqULFizYkaJLkiRJkuaBfkYnXpBknza9J3AMcDtdMHtSy7YSuKJNr23ztOVfqapq6Se30YsPBpYA3wCuB5a00Y73oBv8ae001E2SJEmSNM/sPnUWDgTWtFGEnwVcVlVfSHIbcEmSjwA3Ahe1/BcBn0kyAmylC0qpqluTXAbcBjwJnFZVTwEkOR1YB+wGrK6qW6ethpIkSZKkeWPKILaqbgZeM076Rrr7Y8em/wh46wTbOhs4e5z0K4Er+yivJEmSJGkXtkP3xEqSJEmSNEgGsZIkSZKkoWEQK0mSJEkaGgaxkiRJkqShYRArSZIkSRoaBrGSJEmSpKFhECtJkiRJGhoGsZIkDZkkByX5apLbktya5H0tfb8k65Pc2f7u29KT5LwkI0luTnJYz7ZWtvx3JlnZk354km+3dc5LktmvqSRJ2zOIlSRp+DwJ/N9VdQiwDDgtySHAGcDVVbUEuLrNAxwPLGmvVcD50AW9wFnAkcARwFmjgW/L8+6e9ZbPQr0kSZqSQawkSUOmqu6rqm+26e8DtwMLgRXAmpZtDXBim14BXFyda4F9khwIHAesr6qtVfUwsB5Y3pa9oKquraoCLu7ZliRJA2UQK0nSEEuyGHgNcB1wQFXd1xbdDxzQphcC9/astqmlTZa+aZx0SZIGziBWkqQhleT5wH8HfquqHutd1q6g1iyUYVWSDUk2bNmyZaZ3J0mSQawkScMoybPpAtjPVtXftOQHWldg2t8HW/pm4KCe1Re1tMnSF42Tvp2quqCqllbV0gULFuxcpSRJ6oNBrCRJQ6aNFHwRcHtV/UnPorXA6AjDK4EretJPaaMULwMebd2O1wHHJtm3Deh0LLCuLXssybK2r1N6tiVJ0kBNGcQ6jL8kSXPO64BfBY5KclN7nQCcAxyT5E7gTW0e4EpgIzACfAp4D0BVbQU+DFzfXh9qabQ8F7Z17gKumo2KSZI0ld37yDM6jP83k+wF3JBkPfBOumH8z0lyBt0w/h9g22H8j6Qbov/InmH8l9Ldo3NDkrVtNMTRYfyvo2tol2NjKUnSuKrq68BEP/gePU7+Ak6bYFurgdXjpG8AXrUTxZQkaUZMeSXWYfwlSZIkSXPFDt0T6zD+kiRJkqRB6juIdRh/SZIkSdKg9RXEOoy/JEmSJGku6Gd0YofxlyRJkiTNCf2MTjw6jP+3k9zU0n6Hbtj+y5KcCtwDvK0tuxI4gW5I/h8C74JuGP8ko8P4w/bD+H8a2JNuVGJHJpYkSZIkbWfKINZh/CVJkiRJc8UOjU4sSZIkSdIgGcRKkiRJkoaGQawkSZIkaWgYxEqSJEmShoZBrCRJkiRpaBjESpIkSZKGhkGsJEmSJGloGMRKkiRJkoaGQawkSZIkaWgYxEqSJEmShoZBrCRJkiRpaBjESpI0ZJKsTvJgklt60vZLsj7Jne3vvi09Sc5LMpLk5iSH9ayzsuW/M8nKnvTDk3y7rXNeksxuDSVJmphBrCRJw+fTwPIxaWcAV1fVEuDqNg9wPLCkvVYB50MX9AJnAUcCRwBnjQa+Lc+7e9Ybuy9JkgbGIFaSpCFTVV8Dto5JXgGsadNrgBN70i+uzrXAPkkOBI4D1lfV1qp6GFgPLG/LXlBV11ZVARf3bEuSpIGbMoi1y5IkSUPhgKq6r03fDxzQphcC9/bk29TSJkvfNE76uJKsSrIhyYYtW7bsXA0kSepDP1diP41dliRJGhrtCmrN0r4uqKqlVbV0wYIFs7FLSdIubsog1i5LkiQNhQdau0r7+2BL3wwc1JNvUUubLH3ROOmSJM0Jz/Se2IF0WZIkSRNaC4zerrMSuKIn/ZR2y88y4NHWhq8Djk2yb+sddSywri17LMmydovPKT3bkiRp4Hbf2Q1UVSWZlS5LSVbRdVPmxS9+8WzsUpKkOSfJXwNvBPZPsonulp1zgMuSnArcA7ytZb8SOAEYAX4IvAugqrYm+TBwfcv3oaoa7Xn1HrrbifYErmovSZLmhGcaxD6Q5MCqum8Huiy9cUz6Nexgl6WqugC4AGDp0qWzEjhLkjTXVNXbJ1h09Dh5Czhtgu2sBlaPk74BeNXOlHGmLD7j7yddfvc5vzRLJZEkDcoz7U5slyVJkiRJ0qyb8kqsXZYkSZIkSXPFlEGsXZYmZpclSZIkSZpdz7Q7sSRJkiRJs84gVpIkSZI0NHb6ETuSJElzxWS3AnkbkCTND16JlSRJkiQNDYNYSZIkSdLQMIiVJEmSJA0N74ndCd53I0mSJEmzyyBWkiRpCv5w3R+Pk7RrGPT/ukGspF3eZCdi8IuXJEnSXGIQK0mSdgn+YCUNj0H9vw76CqP6YxArSZLE1F+aZ2K7fimWpB1nEDsgz7ShtLGTtKsxAJDkeaA/HiftKgxiZ8hM/Zq7M/udbyevmarrrnQMJUmSpGFjECtJA+IPJtL8MKgfrjWzvIdamrvmTBCbZDlwLrAbcGFVnTPgIkmahAGYNP/ZNg+WQZQkjW9OBLFJdgM+ARwDbAKuT7K2qm4bbMnmnkEMOgE2lDPN4y9prrFtHm47833BNkfSXDcngljgCGCkqjYCJLkEWAHYUE6jQQXAk3F4dEmas2ybZ8HOtKHD1o3ZtlnSdJkrQexC4N6e+U3AkQMqi8YxbA2lJGmn2TZLkuakVNWgy0CSk4DlVfXrbf5XgSOr6vQx+VYBq9rsy4E7pmH3+wPfm4btzHXWc36xnvOL9Ry8l1TVgkEXYi4ZcNs81lz+7ExkGMsMw1nuYSwzDGe5h7HMMJzltsyTtM1z5UrsZuCgnvlFLW0bVXUBcMF07jjJhqpaOp3bnIus5/xiPecX66k5amBt81jD+NkZxjLDcJZ7GMsMw1nuYSwzDGe5LfPknjUbO+nD9cCSJAcn2QM4GVg74DJJkrQrs22WJM1Jc+JKbFU9meR0YB3dMP6rq+rWARdLkqRdlm2zJGmumhNBLEBVXQlcOYBdz2gXqDnEes4v1nN+sZ6akwbYNo81jJ+dYSwzDGe5h7HMMJzlHsYyw3CW2zJPYk4M7CRJkiRJUj/myj2xkiRJkiRNaZcOYpMsT3JHkpEkZwy6PP1IcneSbye5KcmGlrZfkvVJ7mx/923pSXJeq9/NSQ7r2c7Klv/OJCt70g9v2x9p62aW6rU6yYNJbulJm/F6TbSPWa7nB5Nsbu/pTUlO6Fl2ZivzHUmO60kf97PbBmC5rqVf2gZjIclz2vxIW754hut5UJKvJrktya1J3tfS59V7Okk959V7muS5Sb6R5Futnn/wTMs2XfXX/Nbv/3GSp3r+zwYy6NREn92e5bN6/u1HH2V+Z5ItPcf21wdRzjFl2q79HLN8wnZkkPoo9xuTPNpzrH9/tss4TpnGbdvG5JlTx7vPMs/FYz1u+zomz5w6h/RZ5pk/h1TVLvmiG6TiLuClwB7At4BDBl2uPsp9N7D/mLQ/Bs5o02cAH23TJwBXAQGWAde19P2Aje3vvm1637bsGy1v2rrHz1K93gAcBtwym/WaaB+zXM8PAr89Tt5D2ufyOcDB7fO622SfXeAy4OQ2/efAb7bp9wB/3qZPBi6d4XoeCBzWpvcC/rnVZ169p5PUc169p+0YP79NPxu4rh37HSrbdNbf1/x+9ft/DDw+4HJO+V1iNv9Xp7HM7wQ+PujPwZgybdd+jlk+bjsy6Fcf5X4j8IVBl3NMmcZt2+by8e6zzHPxWI/bvo7JM9fOIf2UecbPIbvyldgjgJGq2lhVPwYuAVYMuEzP1ApgTZteA5zYk35xda4F9klyIHAcsL6qtlbVw8B6YHlb9oKqura6T+DFPduaUVX1NWDrmOTZqNdE+5gRE9RzIiuAS6rqiar6LjBC97kd97ObJMBRwOVt/bHHbLSelwNHt/wzoqruq6pvtunvA7cDC5ln7+kk9ZzIUL6n7X15vM0+u73qGZRtOuuv+W1Wz807oZ/vErN6/u3DUH7/6aP9nKgdGagdbPfnhD7btjl1vJ9BezwnTNK+9ppT55A+yzzjduUgdiFwb8/8Jobgw073IflSkhuSrGppB1TVfW36fuCANj1RHSdL3zRO+qDMRr0m2sdsO711x1nd021uR+v5QuCRqnpyTPo222rLH235Z1zr9vIaul/q5u17OqaeMM/e0yS7JbkJeJDux4S7nkHZprP+mt/6/T9+bpINSa5NcuLsFG0b/XyXGNj5dwL9fv/5D+0cdnmSg2anaDtlWL/XAby2dc28KskrB12YXuO0baPm7PGepMwwB4/12Pa1qiY81nPkHNJPmWGGzyG7chA7rF5fVYcBxwOnJXlD78J2VWreDTk9G/Ua4LE7H/g54FDgPuC/DaAMMyLJ84H/DvxWVT3Wu2w+vafj1HPevadV9VRVHQosoruS84rBlkjDLsmXk9wyzmubq4JT/B+/pKqWAr8C/GmSn5vpcu8i/g5YXFU/T/ej1Zop8uuZ+ybd5/jVwJ8BfzvY4vzUZG34XDVFmefksR7bviZ51YCLNKU+yjzj55BdOYjdDPT+KrCopc1pVbW5/X0Q+Dzdl8kHRrtwtL8PtuwT1XGy9EXjpA/KbNRron3Mmqp6oJ0MfgJ8iu49hR2v50N03Xl2H5O+zbba8r1b/hmT5Nl0Dclnq+pvWvK8e0/Hq+d8fU8BquoR4KvAa59B2aaz/hpyVfWmqnrVOK8r6PP/uKdN3AhcQ3f1ZTb1811iIP+rk5iyzFX1UFU90WYvBA6fpbLtjGH9XvfYaNfM6p7L/Owk+w+4WBO14b3m3PGeqsxz9ViP6mlfl49ZNNfOIU+bqMyzcQ7ZlYPY64El6Ua+3IPuRumBjGzYryTPS7LX6DRwLHALXblXtmwrgSva9FrglHSWAY+27lnrgGOT7Nu6OR4LrGvLHkuyrPW1P6VnW4MwG/WaaB+zZsw9JP873XsKXdlOTjcq3cHAErrBjMb97LarFV8FTmrrjz1mo/U8CfhKyz9TdQpwEXB7Vf1Jz6J59Z5OVM/59p4mWZBknza9J3AM3f1GO1q26ay/5rcp/4/beeE5bXp/4HXAbbNWwk4/3yVm9fzbhynLPOYc9ha6//e5bqJ2ZE5L8m9aW0KSI+i+mw80QJmkDe81p453P2Weo8d6vPb1O2OyzalzSD9lnpVzSM2BkbkG9aIbWe2f6e7t+t1Bl6eP8r6UbhTBbwG3jpaZrl/81cCdwJeB/Vp6gE+0+n0bWNqzrV+jG1RlBHhXT/pSui/cdwEfBzJLdftrum6X/0p3X8Wps1GvifYxy/X8TKvHzXQnqgN78v9uK/Md9IwUPdFnt31GvtHq/zngOS39uW1+pC1/6QzX8/V03f9uBm5qrxPm23s6ST3n1XsK/DxwY6vPLcDvP9OyTVf9fc3v1yTniqXAhW36F9r/2bfa31MHVNbtPrvAh4C3tOlZPf9OU5n/iO57xrfofkh6xRwo83jt528Av9GWT9iOzPFyn95zrK8FfmEOlHmitm3OHu8+yzwXj/VE7eucPYf0WeYZP4eMfumTJEmSJGnO25W7E0uSJEmShoxBrCRJkiRpaBjESpIkSZKGhkGsJEmSJGloGMRKkiRJkoaGQawkSZIkaWgYxEqSJEmShoZBrCRJkiRpaPz/DqsKiRbfFSwAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1152x216 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, axs = plt.subplots(1, 2, figsize=(16,3))\n",
    "axs[0].hist(X.sample(frac=0.01).to_pandas(), bins=50)\n",
    "axs[0].set_title('Histogram non-normalised')\n",
    "axs[1].hist(X_norm.sample(frac=0.01).to_pandas(), bins=50)\n",
    "axs[1].set_title('Histogram normalised')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.2.2 Log-based normalization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some features are not normal distributed in the raw format. If they have a long-tail distribution, we can normalize them by applying the log function, first.\n",
    "\n",
    "\\begin{equation} \\label{eq:log}\n",
    "X_{log} = log(X+1) \\\\\n",
    "X_{log-norm} = \\frac{X_{log} - mean_{X_{log}}}{\\sigma_{X_{log}}} \\sim \\mathcal{N}(0,\\,1)\\,\n",
    "\\end{equation}\n",
    "\n",
    "User behavior data have often a long-tail distribution, such as # of clicks or # of purchases."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df_train['CE_product_id'].to_pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_log = np.log(X+1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_norm = (X_log-X_log.mean())/X_log.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 1.0, 'Histogram normalised')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7EAAADSCAYAAACcq0cwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAm20lEQVR4nO3de9xdZX3n/c9XzlUUkJRiAINjqoO+xlMGsHX6WKmcrIZnRh2sU6LDmGkFp50+fRRqpzieip2ZWqyWPlRSobUiZUpJFcSI8nJsBQkVQU4lIAxJA0TCQWTEor/nj3VFd+7ch51k33vfe+fzfr32a691rdN1rbXv+1q/ta51rVQVkiRJkiSNg6eMOgOSJEmSJPXLIFaSJEmSNDYMYiVJkiRJY8MgVpIkSZI0NgxiJUmSJEljwyBWkiRJkjQ2DGI1EZLcnOSVo86H5l+STyR5fxv+V0lun4dt3J3kFwa9XknS3KzTRyvJe5L8eRs+LMljSXYb8DauTvIfBrlO7VoMYrXgTRdQJHlLkq9sGa+qF1TV1XOsZ0mSSrL7PGVVQ1ZV/6uqnjfqfEiS+mOdPl6q6n9X1dOq6gejzovUyyBWGhAr0ukN+uqtJEnzbZzq9HHKqzQoBrGaCL1XdpMcmWRtkkeT3J/k99tsX27fD7emMS9P8pQkv53kniQPJLkwyTN61ntKm/Zgkv8yZTvvSXJJkj9P8ijwlrbtryZ5OMnGJB9NsmfP+irJ25PckeQ7Sd6X5J8l+buW34t7559Sxrck+UqS/57koSTfSnJCz/RnJVmdZHOSdUne1jPtPW3dF7bt3pxk2Sz7c9b5k/zz1hTo4TbtdT3TPpHk3CSXJ/ku8PNtv/2/SW5M8t0k5yc5KMkVbf1fSLJ/zzr+Msl9SR5J8uUkL5ghn69Msr5n/F1JNrR13p7kmJb+lCRnJLmzHcuLkxzQs9wv9xznd8+0XyRJ8886va86fWper07y/rbtx5L8TZJnJvlky8t1SZb0rOOcJPe2adcn+Vcz5HOrO94t33e18n4ryZt75v33SW5t5bkyybN7pr06yW2tXv8okLl+B9JsDGI1ic4BzqmqpwP/DLi4pf9c+96vNY35KvCW9vl54DnA04CPAiQ5Avgj4M3AwcAzgMVTtrUcuATYD/gk8APgPwMHAi8HjgHePmWZ44CXAUcD7wTOA/4dcCjwQuBNs5TtKOD2tv7fA85PsqUiuAhYDzwLeD3wwSSv6ln2dW2e/YDVW8o5i2nnT7IH8DfA54GfBN4BfDJJb7PeXwI+AOwLbGki9m+AVwM/DbwWuAL4LWAR3f+i/9Sz/BXA0rb+v6fbt7Nq2z8d+JdVtS/dfr67TX4HcBLwf9Htn4eAj7XljgDOBX65TXsmcMhc25MkDYV1+vR1+tS8ApxMV5ctpttXXwX+FDgAuBU4q2f564AXt2l/Afxlkr1nyStJngp8BDih1bM/A9zQpi2nq9P/NV29/r+AT7VpBwJ/Bfx2K+udwM/Oti1pLgaxGhd/3a6EPpzkYbqKaCb/BDw3yYFV9VhVXTPLvG8Gfr+q7qqqx4AzgZPbFcfXA39TVV+pqu8DvwPUlOW/WlV/XVU/rKr/U1XXV9U1VfVkVd0N/H90gVOv36uqR6vqZuCbwOfb9h+hC95eMkt+76mqP2nPplxAVxEflORQugrhXVX1vaq6Afg4cErPsl+pqsvbsn8GvGiW7cw2/9F0JwZnV9X3q+qLwGfYuqK+rKr+tu2X77W0P6yq+6tqA13ldm1Vfb1Nv7S33FW1qqq+U1VPAO8BXtR7NX0GPwD2Ao5IskdV3V1Vd7ZpvwK8u6rW96zz9T3H+TNV9eU27b8AP5xjW5KkHWed3tmZOn2rvLa0P62qO3u2fWdVfaGqngT+kq3r2T+vqgdb2f4HXf3ZTx8TPwRemGSfqtrYyg1dPfu7VXVr294HgRe3u7EnAjdX1SVV9U/AHwD39bEtaUYGsRoXJ1XVfls+bHsltNepdHf7bmvNZ35xlnmfBdzTM34PsDtwUJt275YJVfU48OCU5e/tHUny00k+k64p7KN0/8QPnLLM/T3D/2ea8afNkt8f/dNv+aHN/yxgc1V9Z0pZFk+3LPA4sHeS3ZO8uTU9eizJFXPN37Z1b1X1BnpTt7XVfmn6KneS3ZKcna7p76P8+G7q1P24lapaB/w6XYD6QJKLkjyrTX42cGnPCdOtdEHvdMf5u2x7nCVJg2Od3tmZOn2H61mAJL/Zmv4+0urFZzB3Pftd4N/SBawbk3w2yfPb5GcD5/TUs5vpmgwvZtt9XzPkX+qbQawmTlXdUVVvomuK+iHgktYEZuoVV4B/pPvHu8VhwJN0//g30tOsNMk+dE1Nt9rclPFzgduApa3p028xnOc+/hE4IMm+PWmHARvmWrCqPtmaYj2tqk6Ya/62rUOT9P7/mLqt6fZ1v36JrpnUL9BVqkta+pz7sar+oqpeQXdMi+74Q1dZntB70lRVe7e7whvpmn11G0l+gm2PsyRpBKzTf2Rg9Wx7/vWdwBuB/duFhEfor569sqpeTXfX+DbgT9qke4H/OKWe3aeq/o5t69n0jks7wiBWEyfJv0uyqN0pfLgl/xDY1L6f0zP7p4D/nOTwJE+ju8r66dYU5hLgtUl+Jl3HDO9h7n/w+wKPAo+1q5O/OqBizaqq7gX+DvjdJHsn+Rd0V6//fB42dy3dndl3Jtkj3bv8Xkv3/M4g7As8QXeF/CfojsmckjwvyauS7AV8j+6q85a7xX8MfGBLJxNJFrXnd6A7zr+Y5BXtOL8X/zdK0oJgnT4vdfq+dMH9JmD3JL8DPH2uhdJ1yLi8XUR4AniMrevZM9M6YkzyjCRvaNM+C7wgyb9uLbr+E/BTAyqLdlGeqGkSHQ/cnOQxug4hTm7PtjxO19nQ37bmLkcDq+ie9/wy8C264OcdAO05j3fQBWcb6f5ZP0D3j3smv0l3J/E7dFcnPz344s3oTXR3Lf+R7hnTs6rqC4PeSHuW6LXACcC36Z5lOqWqbhvQJi6kaza1AbgFmO35p157AWe3PN1Hd9X+zDbtHLrOqT6f5DttnUfBj47zaXQdW2yk6/RpPZKkhcA6ffB1+pXA54B/oKtvv0d/zXufAvxGy9NmuueDfxWgqi6lu1N+UWt6/U268wSq6tvAG+jq6AfpOm782wGVRbuodM3SJc2lXdV9mK5Z0bdGnB1JkrSDrNOl8eadWGkWSV6b5Cda05n/DtzEjzsakiRJY8I6XZocBrHS7JbTNZv5R7rmLyeXzRckSRpH1unShLA5sSRJkiRpbHgnVpIkSZI0NgxiJUmSJEljY/dRZ2BHHXjggbVkyZJRZ0OSNAGuv/76b1fVolHnY9xZN0uSBmW2unlsg9glS5awdu3aUWdDkjQBktwz6jxMAutmSdKgzFY325xYkiRJkjQ2DGIlSZIkSWPDIFaSJEmSNDYMYiVJkiRJY8MgVpIkSZI0Nsa2d+JBWXLGZ2ecdvfZrxliTiRJkiRp4Rt1DOWdWEmSJEnS2OgriE2yX5JLktyW5NYkL09yQJI1Se5o3/u3eZPkI0nWJbkxyUt71rOizX9HkhU96S9LclNb5iNJMviiSpIkSZLGXb93Ys8BPldVzwdeBNwKnAFcVVVLgavaOMAJwNL2WQmcC5DkAOAs4CjgSOCsLYFvm+dtPcsdv3PFkiRJkiRNojmD2CTPAH4OOB+gqr5fVQ8Dy4EL2mwXACe14eXAhdW5BtgvycHAccCaqtpcVQ8Ba4Dj27SnV9U1VVXAhT3rkiRJkiTpR/q5E3s4sAn40yRfT/LxJE8FDqqqjW2e+4CD2vBi4N6e5de3tNnS10+Tvo0kK5OsTbJ206ZNfWRdkiRJkjRJ+glidwdeCpxbVS8BvsuPmw4D0O6g1uCzt7WqOq+qllXVskWLFs335iRJkiRJC0w/r9hZD6yvqmvb+CV0Qez9SQ6uqo2tSfADbfoG4NCe5Q9paRuAV05Jv7qlHzLN/JIkSZKkGYz6VTejMued2Kq6D7g3yfNa0jHALcBqYEsPwyuAy9rwauCU1kvx0cAjrdnxlcCxSfZvHTodC1zZpj2a5OjWK/EpPeuSJEmSJOlH+rkTC/AO4JNJ9gTuAt5KFwBfnORU4B7gjW3ey4ETgXXA421eqmpzkvcB17X53ltVm9vw24FPAPsAV7SPJEmSJElb6SuIraobgGXTTDpmmnkLOG2G9awCVk2TvhZ4YT95kSRJkiTtuvp9T6wkSZIkSSNnECtJkiRJGhsGsZIkSZKksWEQK0mSJEkaGwaxkiRJkqSxYRArSdIESXJ3kpuS3JBkbUs7IMmaJHe07/1bepJ8JMm6JDcmeWnPela0+e9IsmKm7UmSNGwGsZIkTZ6fr6oXV9WW1+OdAVxVVUuBq9o4wAnA0vZZCZwLXdALnAUcBRwJnLUl8JUkadQMYiVJmnzLgQva8AXAST3pF1bnGmC/JAcDxwFrqmpzVT0ErAGOH3KeJUmalkGsJEmTpYDPJ7k+ycqWdlBVbWzD9wEHteHFwL09y65vaTOlS5I0cruPOgOSJGmgXlFVG5L8JLAmyW29E6uqktSgNtYC5ZUAhx122KBWK0nSjLwTK0nSBKmqDe37AeBSumda72/NhGnfD7TZNwCH9ix+SEubKX267Z1XVcuqatmiRYsGWRRJkqZlECtJ0oRI8tQk+24ZBo4FvgmsBrb0MLwCuKwNrwZOab0UHw080podXwkcm2T/1qHTsS1NkqSRszmxJEmT4yDg0iTQ1fF/UVWfS3IdcHGSU4F7gDe2+S8HTgTWAY8DbwWoqs1J3gdc1+Z7b1VtHl4xJEmamUGsJEkToqruAl40TfqDwDHTpBdw2gzrWgWsGnQeJUnaWTYnliRJkiSNDYNYSZIkSdLY6CuITXJ3kpuS3JBkbUs7IMmaJHe07/1bepJ8JMm6JDcmeWnPela0+e9IsqIn/WVt/evashl0QSVJkiRJ42977sT+fFW9uKqWtfEzgKuqailwVRsHOAFY2j4rgXOhC3qBs4Cj6Lr7P2tL4NvmeVvPcsfvcIkkSZIkSRNrZ5oTLwcuaMMXACf1pF9YnWuA/do76Y4D1lTV5qp6CFgDHN+mPb2qrmkdTFzYsy5JkiRJkn6k3yC2gM8nuT7JypZ2UHuXHMB9dN36AywG7u1Zdn1Lmy19/TTp20iyMsnaJGs3bdrUZ9YlSZIkSZOi31fsvKKqNiT5SWBNktt6J1ZVJanBZ29rVXUecB7AsmXL5n17kiRJkqSFpa87sVW1oX0/AFxK90zr/a0pMO37gTb7BuDQnsUPaWmzpR8yTbokSZIkSVuZ805skqcCT6mq77ThY4H3AquBFcDZ7fuytshq4PQkF9F14vRIVW1MciXwwZ7OnI4FzqyqzUkeTXI0cC1wCvCHgyuiJEmSJKlfS8747KizMKt+mhMfBFza3nqzO/AXVfW5JNcBFyc5FbgHeGOb/3LgRGAd8DjwVoAWrL4PuK7N996q2tyG3w58AtgHuKJ9JEmSJEnaypxBbFXdBbxomvQHgWOmSS/gtBnWtQpYNU36WuCFfeRXkiRJkrQL25lX7EiSJEmSNFQGsZIkSZKksWEQK0mSJEkaGwaxkiRJkqSx0U/vxJIkSZI08WZ7tczdZ79miDnRbLwTK0mSJEkaGwaxkiRJkqSxYRArSdKESbJbkq8n+UwbPzzJtUnWJfl0kj1b+l5tfF2bvqRnHWe29NuTHDeiokiStA2DWEmSJs+vAbf2jH8I+HBVPRd4CDi1pZ8KPNTSP9zmI8kRwMnAC4DjgT9KstuQ8i5J0qwMYiVJmiBJDgFeA3y8jQd4FXBJm+UC4KQ2vLyN06Yf0+ZfDlxUVU9U1beAdcCRQymAJElzMIiVJGmy/AHwTuCHbfyZwMNV9WQbXw8sbsOLgXsB2vRH2vw/Sp9mGUmSRsogVpKkCZHkF4EHqur6IW5zZZK1SdZu2rRpWJuVJO3CDGIlSZocPwu8LsndwEV0zYjPAfZLsuXd8IcAG9rwBuBQgDb9GcCDvenTLLOVqjqvqpZV1bJFixYNtjSSJE3DIFaSpAlRVWdW1SFVtYSuY6YvVtWbgS8Br2+zrQAua8Or2zht+herqlr6ya334sOBpcDXhlQMSZJmtfvcs0iSpDH3LuCiJO8Hvg6c39LPB/4syTpgM13gS1XdnORi4BbgSeC0qvrB8LMtSdK2DGIlSZpAVXU1cHUbvotpeheuqu8Bb5hh+Q8AH5i/HEqStGP6bk48ny9OT3J8S1uX5IwBlk+SJEmSNEG255nYeXlxent5+seAE4AjgDe1eSVJkiRJ2kpfQew8vzj9SGBdVd1VVd+n601x+U6WS5IkSZI0gfq9E/sHzN+L0/t+obrvopMkSZKkXducHTv1vjg9ySvnPUezqKrzgPMAli1bVqPMiyRJkqRdx5IzPjvr9LvPfs2QcqJ+eife8uL0E4G9gafT8+L0drd1uhenr9+OF6f39UJ1SZIkSdKubc7mxEN4cfp1wNLW2/GebRurB1I6SZIkSdJE2Zn3xA7sxelJTgeuBHYDVlXVzTuRL0mSJEnShNquIHa+XpxeVZcDl29PXiRJkiRJu57teU+sJEmSJEkjZRArSZIkSRobBrGSJEmSpLGxMx07SZIkSdKCMtv7XH2X62QwiJUkSZI0dAab2lE2J5YkSZIkjQ2DWEmSJEnS2DCIlSRJkiSNDYNYSZIkSdLYMIiVJEmSJI0Ng1hJkiZEkr2TfC3JN5LcnOS/tvTDk1ybZF2STyfZs6Xv1cbXtelLetZ1Zku/PclxIyqSJEnb8BU7kiRNjieAV1XVY0n2AL6S5ArgN4APV9VFSf4YOBU4t30/VFXPTXIy8CHg3yY5AjgZeAHwLOALSX66qn4wikJJWrh8TY5GwSBWkqQJUVUFPNZG92ifAl4F/FJLvwB4D10Qu7wNA1wCfDRJWvpFVfUE8K0k64Ajga/OfykkafIY7A+WQawkSRMkyW7A9cBzgY8BdwIPV9WTbZb1wOI2vBi4F6CqnkzyCPDMln5Nz2p7l5m6vZXASoDDDjtsoGWRNDgGUfNvtn2swfKZWEmSJkhV/aCqXgwcQnf39PnzvL3zqmpZVS1btGjRfG5KkiTAIFaSpIlUVQ8DXwJeDuyXZEvrq0OADW14A3AoQJv+DODB3vRplpEkaaTmDGKH0dNhkuNb2rokZ8xDOSVJmnhJFiXZrw3vA7wauJUumH19m20FcFkbXt3GadO/2J6rXQ2c3Or0w4GlwNeGUghJkubQz53YLT0dvgh4MXB8kqPpejD8cFU9F3iIrodD6OnpEPhwm48pPR0eD/xRkt3aszsfA04AjgDe1OaVJEnb52DgS0luBK4D1lTVZ4B3Ab/ROmh6JnB+m/984Jkt/TeAMwCq6mbgYuAW4HPAafZMLElaKObs2GkIPR0CrKuquwCSXNTmvWVnCiZJ0q6mqm4EXjJN+l38uM7tTf8e8IYZ1vUB4AODzqM0KXa0Ex87UZJ2Xl+9Ew+hp8N7p6QfNUM+7AFRkiRJY2uu4NcgV5pbX0Fsa0L04vaczaXMc0+Hs+TjPOA8gGXLltUo8iBJkiRpbr5yRvNlu94TW1UPJ9mqp8N2N3a6ng7Xb0dPh/aAKEmSJM3Cu7g/tqMBsoH1ZOind+L57unwOmBp6+14T7rOn1YPoGySJEmSpAnTz53Yg4EL2nOxTwEurqrPJLkFuCjJ+4Gvs3VPh3/WOm7aTBeUUlU3J9nS0+GT9PR0mOR04EpgN2BV6xVRkiRJkqSt9NM78bz3dFhVlwOX95FfSZIkSZoYNhPfftv1TKwkSZKk+eMzm9LcDGIlSZI0sXal97lOUgA8SWXR4BnESpIkaWzNV7CzKwVRNmfVuJmzd2JJkiRJkhYKg1hJkiRJ0tgwiJUkSZIkjQ2DWEmSJEnS2LBjJ0mSJEkz2pU6udJ4MIiVJEnSgmYQJamXzYklSZIkSWPDIFaSJEmSNDYMYiVJkiRJY8NnYiVJmhBJDgUuBA4CCjivqs5JcgDwaWAJcDfwxqp6KEmAc4ATgceBt1TV37d1rQB+u636/VV1wTDLosnjc62SBsU7sZIkTY4ngf+nqo4AjgZOS3IEcAZwVVUtBa5q4wAnAEvbZyVwLkALes8CjgKOBM5Ksv8wCyJJ0kwMYiVJmhBVtXHLndSq+g5wK7AYWA5suZN6AXBSG14OXFida4D9khwMHAesqarNVfUQsAY4fnglkSRpZgaxkiRNoCRLgJcA1wIHVdXGNuk+uubG0AW49/Ystr6lzZQ+3XZWJlmbZO2mTZsGVwBJkmYw5zOxw3i+JsnLgE8A+wCXA79WVTWgMkqStEtJ8jTgfwK/XlWPdlVzp6oqycDq2Ko6DzgPYNmyZdbduzife5U0DP3ciR3G8zXnAm/rWc4mS5Ik7YAke9AFsJ+sqr9qyfe3ZsK07wda+gbg0J7FD2lpM6VLkjRycwax8/18TZv29Kq6pt19vbBnXZIkqU+tNdT5wK1V9fs9k1YDK9rwCuCynvRT0jkaeKQ1O74SODbJ/u2C87EtTZKkkduuV+zM0/M1i9vw1HRJkrR9fhb4ZeCmJDe0tN8CzgYuTnIqcA/wxjbtcrrHf9bRPQL0VoCq2pzkfcB1bb73VtXmoZRAkqQ59B3EDvP5mlnysJKuiTKHHXbYfG9OkqSxUlVfATLD5GOmmb+A02ZY1ypg1eByJ0nSYPQVxM72fE1VbdyO52teOSX96pZ+yDTzb8POIyRJkiTtSuwwbVv99E481/M1Z7Pt8zWnJ7mIrhOnR1qgeyXwwZ7OnI4FzmxNlh5tz+JcC5wC/OEAyiZJkqTtNNsJ891nv2aIOZGk6fVzJ3YYz9e8nR+/YueK9pEkSdI82NE7O94RkibHOP89zxnEDuP5mqpaC7xwrrxIkiRJknZt/bwnVpIkSZKkBcEgVpIkSZI0NgxiJUmSJEljwyBWkiRJkjQ2+npPrCRJksbHOPc6Kklz8U6sJEmSJGlseCdWkiRJkibMJLfI8E6sJEmSJGlsGMRKkiRJksaGzYklSZLG0CQ3FZSk2XgnVpIkSZI0NgxiJUmSJEljwyBWkiRJkjQ2DGIlSZIkSWPDIFaSpAmSZFWSB5J8syftgCRrktzRvvdv6UnykSTrktyY5KU9y6xo89+RZMUoyiJJ0nTsnViSpMnyCeCjwIU9aWcAV1XV2UnOaOPvAk4AlrbPUcC5wFFJDgDOApYBBVyfZHVVPTS0UuwiZuth+O6zXzPEnEjS+JjzTux8X9FN8rIkN7VlPpIkgy6kJEm7iqr6MrB5SvJy4II2fAFwUk/6hdW5BtgvycHAccCaqtrcAtc1wPHznnlJkvrQz53YTzC/V3TPBd4GXAtcTldJXrHzRZMkSc1BVbWxDd8HHNSGFwP39sy3vqXNlK4h8j2wkjS9Oe/EzucV3Tbt6VV1TVUVXaB8EpIkaV60+rYGtb4kK5OsTbJ206ZNg1qtJEkz2tGOnQZ1RXdxG56aLkmSBuf+duGY9v1AS98AHNoz3yEtbab0bVTVeVW1rKqWLVq0aOAZlyRpqp3u2KmqKsnArujOJslKYCXAYYcdNoxNSpI0CVYDK4Cz2/dlPemnJ7mI7jGgR6pqY5IrgQ9u6fMCOBY4c8h5nhg2C5akwdrRO7GDuqK7oQ1PTZ+WV3slSZpdkk8BXwWel2R9klPpgtdXJ7kD+IU2Dl1fFHcB64A/Ad4OUFWbgfcB17XPe1uaJEkjt6N3YgdyRbeqNid5NMnRdB07nQL84Q7mSZKkXV5VvWmGScdMM28Bp82wnlXAqgFmTZKkgZgziG1XdF8JHJhkPV0vw2cDF7eru/cAb2yzXw6cSHdF93HgrdBd0U2y5YoubH1F9+10PSDvQ9cr8YLpmXiu5j++v02SJEmShmvOIHa+r+hW1VrghXPlQ5IkSZKkne7YSZIkadLZOZMkLRw72rGTJEmSJElDZxArSZIkSRobBrGSJEmSpLFhECtJkiRJGht27LQTZuvkwdfvSJIkSdLgeSdWkiRJkjQ2DGIlSZIkSWPD5sSSdnlzvf/RxwOkXYPvgpWk8WAQK0mSxoaBpiTJ5sSSJEmSpLHhndgRsWdjSZKm591WSdJsDGLnyc5UwAa4/XE/SbsG/9YlSVIvg9gJ48meND78e5UkSdp+BrHSFAYWkiRJ0sJlECvAV4xo+xnsS5IkaRQWTBCb5HjgHGA34ONVdfaIsyQNjRcRJC1E1s2SpIVoQQSxSXYDPga8GlgPXJdkdVXdMtqcLTz22Ngf7xJK0s6xbpYkLVQLIogFjgTWVdVdAEkuApYDVpQDNF89Ju8MA0pJWrCsmyVJC9JTRp2BZjFwb8/4+pYmSZJGw7pZkrQgLZQ7sX1JshJY2UYfS3L7AFZ7IPDtAaxnoVuQ5cyHBr7KOcu5M9uch/zu6HYX5PHcYoD7aUGUcwjHfZtyjuq3Ns8GfjwHuJ+ePbA17WIGWDcviL/3HWC+h29c826+h8t8D9eP8j2MunmhBLEbgEN7xg9paVupqvOA8wa54SRrq2rZINe5EFnOyWI5J4vl1AI11Lp5XH8f5nv4xjXv5nu4zPdwDTvfC6U58XXA0iSHJ9kTOBlYPeI8SZK0K7NuliQtSAviTmxVPZnkdOBKum78V1XVzSPOliRJuyzrZknSQrUggliAqrocuHwEmx5o8+QFzHJOFss5WSynFqQh183j+vsw38M3rnk338NlvodrqPlOVQ1ze5IkSZIk7bCF8kysJEmSJElz2qWD2CTHJ7k9ybokZ4w6P/1IcneSm5LckGRtSzsgyZokd7Tv/Vt6knykle/GJC/tWc+KNv8dSVb0pL+srX9dWzZDKteqJA8k+WZP2ryXa6ZtDLmc70myoR3TG5Kc2DPtzJbn25Mc15M+7W+3dcBybUv/dOuMhSR7tfF1bfqSeS7noUm+lOSWJDcn+bWWPlHHdJZyTtQxTbJ3kq8l+UYr53/d0bwNqvwaf0n+W5Lb2t/8pUn2m2G+beq9UdqOfC+oc4wkb2h/vz9MMmMPogttf8N25X2h7fO+6qMkP+ipL0bSedpc+26Ydc726CPfb0myqWf//odR5HOqTHM+OGV6MsN50Sj1ke9XJnmkZ3//zrxlpqp2yQ9dJxV3As8B9gS+ARwx6nz1ke+7gQOnpP0ecEYbPgP4UBs+EbgCCHA0cG1LPwC4q33v34b3b9O+1uZNW/aEIZXr54CXAt8cZrlm2saQy/ke4DenmfeI9rvcCzi8/V53m+23C1wMnNyG/xj41Tb8duCP2/DJwKfnuZwHAy9tw/sC/9DKM1HHdJZyTtQxbfv4aW14D+Datu+3K2+DLL+f8f8AxwK7t+EPzfS3yjT13kLP92y/6RHm+58DzwOuBpbNMt+C2t/95n2B7vO+6iPgsRHnc859N8w6Z8D5fgvw0VHndZq8b3M+OGX6tOdFo/70ke9XAp8ZRl525TuxRwLrququqvo+cBGwfMR52lHLgQva8AXAST3pF1bnGmC/JAcDxwFrqmpzVT0ErAGOb9OeXlXXVPdLvLBnXfOqqr4MbJ6SPIxyzbSNeTFDOWeyHLioqp6oqm8B6+h+t9P+dpMEeBVwSVt+6j7bUs5LgGPa/POiqjZW1d+34e8AtwKLmbBjOks5ZzKWx7Qdl8fa6B7tUzuQt0GWX2Ouqj5fVU+20Wvo3kO74PWZ7wV3jlFVt1bV7aPMw47qM+8Lbp8z5HOMndDPvhvqeUSfFuIx70sf54MznReN1Haex86rXTmIXQzc2zO+ntlPPheKAj6f5PokK1vaQVW1sQ3fBxzUhmcq42zp66dJH5VhlGumbQzb6a25yKqe5kbbW85nAg/3nFz1lvNHy7Tpj7T5511rcvQSurt3E3tMp5QTJuyYJtktyQ3AA3QXE+7cgbwNsvyaLP+e7q7DdKar9xaKmfI9rucYsLD392wW4j7vtz7aO8naJNckOWk4WdtKP/tuZOcRs+j3mP+bVh9fkuTQ4WRtpy3E33O/Xp7u8aMrkrxgvjayYF6xo769oqo2JPlJYE2S23onVlUlmbgup4dRrhHuu3OB99GdOLwP+B90J0ZjL8nTgP8J/HpVPdp70XaSjuk05Zy4Y1pVPwBe3J7/uxR4/mhzpHGQ5AvAT00z6d1VdVmb593Ak8AnZ1jNNvVeuxswbwaU76HrJ999GPr+hoHlfehmy3fvyBz10bPbPn8O8MUkN1XVnYPO6y7qb4BPVdUTSf4j3d3kV404T5Ps7+l+z4+l6w/kr4Gl87GhXTmI3QD0Xo05pKUtaFW1oX0/kORSuqYU9yc5uKo2tqYGD7TZZyrjBro2673pV7f0Q6aZf1SGUa6ZtjE0VXX/luEkfwJ8po3O9hudLv1BuuYmu7erpL3zb1nX+iS7A89o88+bJHvQBXafrKq/askTd0ynK+ekHlOAqno4yZeAl+9A3gZZfo2BqvqF2aYneQvwi8Ax7dGA6dYxXb03r0HVAPI9knOMufLd5zqGvr/b9nY27wtunyfpqz7q2ed3JbmarlXPMIPYfvbdSOqcOcyZ76rqzePH6Z5THgfjGqc82jN8eZI/SnJgVX170NvalZsTXwcsTdfz5Z50D6mPpEe4fiV5apJ9twzTdS7xTbp8b+m1dQWw5YrlauCUdI4GHmnNWq4Ejk2yf2vmeCxwZZv2aJKj23MOp/SsaxSGUa6ZtjE0U55x+L/pjil0eTs5XY+Ah9NdyfoaM/x224nUl4DXt+Wn7rMt5Xw98MWZThgHVKYA5wO3VtXv90yaqGM6Uzkn7ZgmWdTuwJJkH+DVdM//bm/eBll+jbkkxwPvBF5XVY/PMM9M9d7I9JNvxvAcAxbm/t4OC3Gfz1kftfptrzZ8IPCzwC1Dy2Gnn3031POIPs2Z7yn18evo6q5xMNN50YKW5KfauRFJjqSLNefnYkctgJ6uRvWh6/nrH+iudr171PnpI7/Poet57RvAzVvyTPdMwlXAHcAXgANaeoCPtfLdRE+PfnRNG9e1z1t70pfRVVh3Ah8FMqSyfQrYCPwTXbv/U4dRrpm2MeRy/lkrx410/7QO7pn/3S3Pt9PTU/RMv932G/laK/9fAnu19L3b+Lo2/TnzXM5X0DWlvRG4oX1OnLRjOks5J+qYAv8C+HorzzeB39nRvA2q/H7G/9OO6b09fztbej59FnB5z/Hfpt5b6Plu4wvqHIPugtp64AngfroLggt+f/eb9wW6z2eq85YBH2/DP0NXX3yjfZ86orxus++A99JdrBlqnTPgfP9u+y1/g+6i6PNHneeWr+nOB38F+JU2fcbzogWe79N79vc1wM/MV162nPRJkiRJkrTg7crNiSVJkiRJY8YgVpIkSZI0NgxiJUmSJEljwyBWkiRJkjQ2DGIlSZIkSWPDIFaSJEmSNDYMYiVJkiRJY8MgVpIkSZI0Nv5/oxssagwl6KUAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 1152x216 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "fig, axs = plt.subplots(1, 2, figsize=(16,3))\n",
    "axs[0].hist(X.sample(frac=0.01), bins=50)\n",
    "axs[0].set_title('Histogram non-normalised')\n",
    "axs[1].hist(X_norm.sample(frac=0.01), bins=50)\n",
    "axs[1].set_title('Histogram normalised')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.2.3 Scale to 0-1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Another technique is to scale the numerical features between 0-1.\n",
    "\n",
    "\\begin{equation} \\label{eq:01}\n",
    "X_{norm} = \\frac{X - min(X)}{max(X)-min(X)}\n",
    "\\end{equation}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = df_train['TE_cat_2']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([8.0000e+00, 8.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,\n",
       "        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,\n",
       "        3.2000e+01, 9.2000e+01, 1.5600e+02, 1.4000e+01, 4.9000e+01,\n",
       "        3.4800e+02, 9.1000e+01, 1.0510e+03, 1.1800e+02, 1.7700e+02,\n",
       "        1.4710e+03, 1.1560e+03, 3.5200e+02, 7.2900e+02, 3.4340e+03,\n",
       "        4.7980e+04, 1.9500e+03, 1.0770e+03, 4.4340e+03, 3.4220e+03,\n",
       "        2.9400e+02, 3.6550e+03, 0.0000e+00, 3.5900e+02, 1.6090e+03,\n",
       "        2.5380e+03, 9.9100e+02, 2.3800e+02, 0.0000e+00, 0.0000e+00,\n",
       "        0.0000e+00, 2.7500e+02, 0.0000e+00, 3.4905e+04, 8.4600e+02,\n",
       "        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 7.5400e+02]),\n",
       " array([0.  , 0.02, 0.04, 0.06, 0.08, 0.1 , 0.12, 0.14, 0.16, 0.18, 0.2 ,\n",
       "        0.22, 0.24, 0.26, 0.28, 0.3 , 0.32, 0.34, 0.36, 0.38, 0.4 , 0.42,\n",
       "        0.44, 0.46, 0.48, 0.5 , 0.52, 0.54, 0.56, 0.58, 0.6 , 0.62, 0.64,\n",
       "        0.66, 0.68, 0.7 , 0.72, 0.74, 0.76, 0.78, 0.8 , 0.82, 0.84, 0.86,\n",
       "        0.88, 0.9 , 0.92, 0.94, 0.96, 0.98, 1.  ]),\n",
       " <BarContainer object of 50 artists>)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD7CAYAAACIYvgKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAARNElEQVR4nO3dfazeZX3H8ffHVpRNEbRHQtriYbFuqyxTPIEal01hlgILJRkSyJRKOpoILG4zm3X7gw0kwSyTSYIPnTQUMwXm5mi0rGt4CNmyYg9DgcIYRwRph7ZaKFuIOPC7P+6relvPw92ec+7z0PcruXN+v+91/e7fdZ1zej737+G+m6pCknRke8VMD0CSNPMMA0mSYSBJMgwkSRgGkiQMA0kSPYZBkieTPJTkG0mGW+31SbYlebx9Pa7Vk+T6JCNJHkxyStfzrGn9H0+ypqv+jvb8I23bTPVEJUljO5Qjg/dU1duqaqitrwfurKplwJ1tHeAsYFl7rAM+A53wAK4ETgNOBa48ECCtz6Vd26067BlJkg7Zwklsuxp4d1veBNwDfLTVb67Ou9m2Jzk2yQmt77aq2geQZBuwKsk9wDFVtb3VbwbOA+4Yb+eLFi2qwcHBSQxfko4s999///eramC0tl7DoIB/SVLA56pqA3B8VT3T2r8LHN+WFwNPd227q9XGq+8apT6uwcFBhoeHexy+JCnJU2O19RoGv1FVu5O8EdiW5D+7G6uqWlBMqyTr6Jx64sQTT5zu3UnSEaOnawZVtbt93QN8hc45/++10z+0r3ta993A0q7Nl7TaePUlo9RHG8eGqhqqqqGBgVGPdCRJh2HCMEjyi0lee2AZWAk8DGwGDtwRtAa4vS1vBi5udxWtAPa300lbgZVJjmsXjlcCW1vb80lWtLuILu56LklSH/Rymuh44Cvtbs+FwBer6p+T7ABuS7IWeAq4oPXfApwNjAAvAJcAVNW+JFcDO1q/qw5cTAYuA24CjqZz4Xjci8eSpKmVufoR1kNDQ+UFZEnqXZL7u94e8DN8B7IkyTCQJBkGkiQMA0kSk/s4CkmjGFz/tVHrT157Tp9HIvXOIwNJkmEgSTIMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEocQBkkWJHkgyVfb+klJ7ksykuTWJEe1+qva+khrH+x6jo+1+mNJzuyqr2q1kSTrp3B+kqQeHMqRwYeBR7vWPwFcV1VvBp4F1rb6WuDZVr+u9SPJcuBC4K3AKuDTLWAWADcAZwHLgYtaX0lSn/QUBkmWAOcAn2/rAU4Hvty6bALOa8ur2zqt/YzWfzVwS1W9WFXfBkaAU9tjpKqeqKofAbe0vpKkPun1yOBvgD8FftzW3wA8V1UvtfVdwOK2vBh4GqC172/9f1I/aJux6pKkPpkwDJL8DrCnqu7vw3gmGsu6JMNJhvfu3TvTw5GkeaOXI4N3AecmeZLOKZzTgU8BxyZZ2PosAXa35d3AUoDW/jrgB931g7YZq/5zqmpDVQ1V1dDAwEAPQ5ck9WLCMKiqj1XVkqoapHMB+K6q+j3gbuD81m0NcHtb3tzWae13VVW1+oXtbqOTgGXA14EdwLJ2d9JRbR+bp2R2kqSeLJy4y5g+CtyS5OPAA8CNrX4j8IUkI8A+On/cqaqdSW4DHgFeAi6vqpcBklwBbAUWABurauckxiVJOkSHFAZVdQ9wT1t+gs6dQAf3+SHwvjG2vwa4ZpT6FmDLoYxFkjR1fAeyJMkwkCQZBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSmNx/eylJ88bg+q+NWn/y2nP6PJKZ4ZGBJMkwkCQZBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiS6CEMkrw6ydeTfDPJziR/2eonJbkvyUiSW5Mc1eqvausjrX2w67k+1uqPJTmzq76q1UaSrJ+GeUqSxtHLkcGLwOlV9evA24BVSVYAnwCuq6o3A88Ca1v/tcCzrX5d60eS5cCFwFuBVcCnkyxIsgC4ATgLWA5c1PpKkvpkwjCojv9tq69sjwJOB77c6puA89ry6rZOaz8jSVr9lqp6saq+DYwAp7bHSFU9UVU/Am5pfSVJfdLTNYP2Cv4bwB5gG/At4Lmqeql12QUsbsuLgacBWvt+4A3d9YO2Gas+2jjWJRlOMrx3795ehi5J6kFPYVBVL1fV24AldF7J/8p0DmqccWyoqqGqGhoYGJiJIUjSvHRIdxNV1XPA3cA7gWOTLGxNS4DdbXk3sBSgtb8O+EF3/aBtxqpLkvqkl7uJBpIc25aPBt4LPEonFM5v3dYAt7flzW2d1n5XVVWrX9juNjoJWAZ8HdgBLGt3Jx1F5yLz5imYmySpRwsn7sIJwKZ2188rgNuq6qtJHgFuSfJx4AHgxtb/RuALSUaAfXT+uFNVO5PcBjwCvARcXlUvAyS5AtgKLAA2VtXOKZuhJGlCE4ZBVT0IvH2U+hN0rh8cXP8h8L4xnusa4JpR6luALT2MV5I0DXwHsiTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSfQQBkmWJrk7ySNJdib5cKu/Psm2JI+3r8e1epJcn2QkyYNJTul6rjWt/+NJ1nTV35HkobbN9UkyHZOVJI2ulyODl4CPVNVyYAVweZLlwHrgzqpaBtzZ1gHOApa1xzrgM9AJD+BK4DTgVODKAwHS+lzatd2qyU9NktSrCcOgqp6pqv9oy/8DPAosBlYDm1q3TcB5bXk1cHN1bAeOTXICcCawrar2VdWzwDZgVWs7pqq2V1UBN3c9lySpDw7pmkGSQeDtwH3A8VX1TGv6LnB8W14MPN212a5WG6++a5T6aPtfl2Q4yfDevXsPZeiSpHH0HAZJXgP8A/CHVfV8d1t7RV9TPLafU1UbqmqoqoYGBgame3eSdMToKQySvJJOEPxdVf1jK3+vneKhfd3T6ruBpV2bL2m18epLRqlLkvqkl7uJAtwIPFpVn+xq2gwcuCNoDXB7V/3idlfRCmB/O520FViZ5Lh24XglsLW1PZ9kRdvXxV3PJUnqg4U99HkX8AHgoSTfaLU/A64FbkuyFngKuKC1bQHOBkaAF4BLAKpqX5KrgR2t31VVta8tXwbcBBwN3NEekqQ+mTAMqupfgbHu+z9jlP4FXD7Gc20ENo5SHwZOnmgskqTp4TuQJUmGgSTJMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJoocwSLIxyZ4kD3fVXp9kW5LH29fjWj1Jrk8ykuTBJKd0bbOm9X88yZqu+juSPNS2uT5JpnqSkqTx9XJkcBOw6qDaeuDOqloG3NnWAc4ClrXHOuAz0AkP4ErgNOBU4MoDAdL6XNq13cH7kiRNswnDoKruBfYdVF4NbGrLm4Dzuuo3V8d24NgkJwBnAtuqal9VPQtsA1a1tmOqantVFXBz13NJkvrkcK8ZHF9Vz7Tl7wLHt+XFwNNd/Xa12nj1XaPUJUl9NOkLyO0VfU3BWCaUZF2S4STDe/fu7ccuJemIcLhh8L12iof2dU+r7waWdvVb0mrj1ZeMUh9VVW2oqqGqGhoYGDjMoUuSDna4YbAZOHBH0Brg9q76xe2uohXA/nY6aSuwMslx7cLxSmBra3s+yYp2F9HFXc8lSeqThRN1SPIl4N3AoiS76NwVdC1wW5K1wFPABa37FuBsYAR4AbgEoKr2Jbka2NH6XVVVBy5KX0bnjqWjgTvaQ5LURxOGQVVdNEbTGaP0LeDyMZ5nI7BxlPowcPJE45AkTR/fgSxJMgwkSYaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkiR7+D2RJM2Nw/ddGrT957Tl9HomOBB4ZSJIMA0mSp4mkeW+s003gKSf9lGEgHabx/sjOxH79w67J8DSRJMkwkCQZBpIkvGYgaQp4HWPu88hAkuSRgdQvvnrWbOaRgSTJIwPNX74Sn3oz9d4KTT+PDCRJHhlodunHq/nZdsTgq23NBrMmDJKsAj4FLAA+X1XXzvCQJE3SbAvewzEf5tCLWREGSRYANwDvBXYBO5JsrqpHZnZkkq/cNTP6HUKzIgyAU4GRqnoCIMktwGrAMJjj5tKrKv/o/5TfiyPPbAmDxcDTXeu7gNNmaCxHlEP9Rz9Vf8QPdb/+cZrYXPoeTfeLhLn0ImS2SFXN9BhIcj6wqqp+v61/ADitqq44qN86YF1b/WXgscPc5SLg+4e57VzlnOe/I22+4JwP1ZuqamC0htlyZLAbWNq1vqTVfkZVbQA2THZnSYaramiyzzOXOOf570ibLzjnqTRb3mewA1iW5KQkRwEXAptneEySdMSYFUcGVfVSkiuArXRuLd1YVTtneFiSdMSYFWEAUFVbgC192t2kTzXNQc55/jvS5gvOecrMigvIkqSZNVuuGUiSZtC8DoMkq5I8lmQkyfpR2l+V5NbWfl+SwRkY5pTpYb5/nOSRJA8muTPJm2ZinFNpojl39fvdJJVkzt950suck1zQftY7k3yx32Ocaj38bp+Y5O4kD7Tf77NnYpxTJcnGJHuSPDxGe5Jc374fDyY5ZdI7rap5+aBzIfpbwC8BRwHfBJYf1Ocy4LNt+ULg1pke9zTP9z3AL7TlD83l+fY659bvtcC9wHZgaKbH3Yef8zLgAeC4tv7GmR53H+a8AfhQW14OPDnT457knH8TOAV4eIz2s4E7gAArgPsmu8/5fGTwk4+4qKofAQc+4qLbamBTW/4ycEaS9HGMU2nC+VbV3VX1QlvdTuf9HHNZLz9jgKuBTwA/7Ofgpkkvc74UuKGqngWoqj19HuNU62XOBRzTll8H/HcfxzflqupeYN84XVYDN1fHduDYJCdMZp/zOQxG+4iLxWP1qaqXgP3AG/oyuqnXy3y7raXzymIum3DO7fB5aVXNnc9qGF8vP+e3AG9J8m9JtrdPBJ7LepnzXwDvT7KLzl2Jf9Cfoc2YQ/33PqFZc2up+ifJ+4Eh4LdmeizTKckrgE8CH5zhofTbQjqnit5N5+jv3iS/VlXPzeSgptlFwE1V9ddJ3gl8IcnJVfXjmR7YXDGfjwx6+YiLn/RJspDO4eUP+jK6qdfTR3ok+W3gz4Fzq+rFPo1tukw059cCJwP3JHmSzrnVzXP8InIvP+ddwOaq+r+q+jbwX3TCYa7qZc5rgdsAqurfgVfT+Qyf+aqnf++HYj6HQS8fcbEZWNOWzwfuqnZ1Zg6acL5J3g58jk4QzPXzyDDBnKtqf1UtqqrBqhqkc53k3KoanpnhTolefq//ic5RAUkW0Tlt9EQfxzjVepnzd4AzAJL8Kp0w2NvXUfbXZuDidlfRCmB/VT0zmSect6eJaoyPuEhyFTBcVZuBG+kcTo7QuVhz4cyNeHJ6nO9fAa8B/r5dJ/9OVZ07Y4OepB7nPK/0OOetwMokjwAvA39SVXP1iLfXOX8E+Nskf0TnYvIH5/ALO5J8iU6gL2rXQa4EXglQVZ+lc13kbGAEeAG4ZNL7nMPfL0nSFJnPp4kkST0yDCRJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRLw/0KZS4q182AKAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.hist(((X-X.min())/(X.max()-X.min())).sample(frac=0.01).to_pandas(), bins=50)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Practice"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, it is your turn.<br><br>\n",
    "\n",
    "**ToDo**:\n",
    "<li>Normalize the features: price, TE_ts_weekday_ts_hour_cat_2_brand, CE_cat_2<br>\n",
    "<li>Which normalization technique seems good?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "### ToDo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Optimisation is skipped"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We shutdown the kernel."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'status': 'ok', 'restart': False}"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "app = IPython.Application.instance()\n",
    "app.kernel.do_shutdown(False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
